from proxypool.crawlers.base import BaseCrawler
from proxypool.schemas.proxy import Proxy
from parsel import Selector

MAX_PAGE = 3
BASE_URL = 'http://www.ip3366.net/free/?stype={stype}&page={page}'


class Ip3366Crawler(BaseCrawler):
    # 存储ip3366的页面代理，stype分为匿名和普通代理
    urls = [BASE_URL.format(stype=stype, page=i)
            for stype in range(1, 3) for i in range(1, 8)]

    ignore = True

    def parse(self, html):
        sel = Selector(html)
        tds = sel.xpath('//tbody/tr/td[position()<3]/text()')
        for i in range(0, len(tds), 2):
            yield Proxy(tds[i].get().strip(), int(tds[i+1].get().strip()))
        
